#!/usr/bin/env perl
use v5.10;
use strict;
use warnings;
use utf8;
use open qw/:std :utf8/;

use Getopt::Long;
use Pod::Usage;

use if $^O eq 'MSWin32', 'Win32::Console::ANSI';
use Term::ANSIColor;

use constant {
    NULL              => "\x00",
    BSON_TYPE         => "C",
    BSON_ENAME        => "Z*",
    BSON_TYPE_NAME    => "CZ*",
    BSON_DOUBLE       => "d",
    BSON_STRING       => "l/A",
    BSON_BOOLEAN      => "C",
    BSON_REGEX        => "Z*Z*",
    BSON_JSCODE       => "",
    BSON_INT32        => "l",
    BSON_INT64        => "q",
    BSON_TIMESTAMP    => "q",
    BSON_CODE_W_SCOPE => "l",
    BSON_REMAINING    => 'a*',
    BSON_SKIP_4_BYTES => 'x4',
    BSON_OBJECTID     => 'a12',
    BSON_BINARY_TYPE  => 'C',
    BSON_CSTRING      => 'Z*',
    BSON_BYTES        => 'a*'
};

my $BOLD = $^O eq 'MSWin32' ? "bold " : "";

# minimum field size
my %FIELD_SIZES = (
    0x01 => 8,
    0x02 => 5,
    0x03 => 5,
    0x04 => 5,
    0x05 => 5,
    0x06 => 0,
    0x07 => 12,
    0x08 => 1,
    0x09 => 8,
    0x0A => 0,
    0x0B => 2,
    0x0C => 17,
    0x0D => 5,
    0x0E => 5,
    0x0F => 14,
    0x10 => 4,
    0x11 => 8,
    0x12 => 8,
    0x7F => 0,
    0xFF => 0,
);

sub main {
    my ( $hex, $file, $help );
    GetOptions(
        "file=s" => \$file,
        "x"      => \$hex,
        "help|h" => \$help,
    ) or die("Error in command line args");
    pod2usage( { -exitval => 2, -verbose => 2, } ) if $help;

    if ( $file ) {
        dump_file($file);
    }
    else {
        dump_stdin($hex);
    }
}

sub dump_stdin {
    my $hex = shift;
    while ( defined( my $bson = <STDIN> ) ) {
        chomp $bson;
        if ( !length($bson) ) {
            print_error("[ no document ]\n");
            next;
        }
        # in -x mode, treat leading # as a comment
        if ( $hex && index( $bson, "#" ) == 0 ) {
            say $bson;
            next;
        }
        $bson =~ s[ ][]g if $hex;
        $bson = pack( "H*", $bson ) if $hex;
        dump_document( \$bson );
        print "\n";
    }
}

sub dump_file {
    my $file = shift;
    open my $fh, "<", $file;
    binmode($fh);
    my $data = do { local $/; <$fh> };
    while ( length $data ) {
        my $len = unpack( BSON_INT32, $data );
        my $bson = substr($data,0,$len,'');
        dump_document(\$bson);
        print "\n";
    }
}

sub dump_document {
    my ( $ref, $is_array ) = @_;
    print $is_array ? " [" : " {" if defined $is_array;
    dump_header($ref);
    1 while dump_field($ref);
    print_error( " " . unpack( "H*", $$ref ) ) if length($$ref);
    print $is_array ? " ]" : " }" if defined $is_array;
    return;
}

sub dump_header {
    my ($ref) = @_;

    my $len = get_length( $ref, 4 );
    return unless defined $len;

    if ( $len < 5 || $len < length($$ref) + 4 ) {
        print_length( $len, 'red' );
    }
    else {
        print_length( $len, 'blue' );
    }
}

sub dump_field {
    my ($ref) = @_;

    # detect end of document
    if ( length($$ref) < 2 ) {
        if ( length($$ref) == 0 ) {
            print_error(" [missing terminator]");
        }
        else {
            my $end = substr( $$ref, 0, 1, '' );
            print_hex( $end, $end eq NULL ? 'blue' : 'red' );
        }
        return;
    }

    # unpack type
    my $type = unpack( BSON_TYPE, substr( $$ref, 0, 1, '' ) );

    if ( !exists $FIELD_SIZES{$type} ) {
        print_type( $type, 'red' );
        return;
    }

    print_type($type);

    # check for key termination
    my $key_end = index( $$ref, NULL );
    return if $key_end == -1;

    # unpack key
    my $key = unpack( BSON_CSTRING, substr( $$ref, 0, $key_end + 1, '' ) );
    print_key($key);

    # Check if there is enough data to complete field for this type
    # This is greedy, so it checks length, not length -1
    my $min_size = $FIELD_SIZES{$type};
    return if length($$ref) < $min_size;

    # fields without payload: 0x06, 0x0A, 0x7F, 0xFF
    return 1 if $min_size == 0;

    # document or array
    if ( $type == 0x03 || $type == 0x04 ) {
        my ($len) = unpack( BSON_INT32, $$ref );
        my $doc = substr( $$ref, 0, $len, '' );
        dump_document( \$doc, $type == 0x04 );
        return 1;
    }

    # fixed width fields
    if (   $type == 0x01
        || $type == 0x07
        || $type == 0x09
        || $type == 0x10
        || $type == 0x11
        || $type == 0x12 )
    {
        my $len = ( $type == 0x10 ? 4 : $type == 0x07 ? 12 : 8 );
        print_hex( substr( $$ref, 0, $len, '' ) );
        return 1;
    }

    # boolean
    if ( $type == 0x08 ) {
        my $bool = substr( $$ref, 0, 1, '' );
        print_hex( $bool, ( $bool eq "\x00" || $bool eq "\x01" ) ? 'green' : 'red' );
        return 1;
    }

    # binary field
    if ( $type == 0x05 ) {
        my $len = get_length( $ref, -1 );
        my $subtype = substr( $$ref, 0, 1, '' );

        if ( !defined($len) ) {
            print_hex($subtype);
            return;
        }

        my $binary = substr( $$ref, 0, $len, '' );

        print_length($len);
        print_hex($subtype);

        if ( $subtype eq "\x02" ) {
            my $bin_len = get_length( \$binary );
            if ( !defined($bin_len) ) {
                print_hex( $binary, 'red' );
                return;
            }
            if ( $bin_len != length($binary) ) {
                print_length( $bin_len, 'red' );
                print_hex( $binary, 'red' );
                return;
            }
        }

        print_hex($binary) if length($binary);
        return 1;
    }

    # string or symbol or code
    if ( $type == 0x02 || $type == 0x0e || $type == 0x0d ) {
        my ( $len, $string ) = get_string($ref);
        return unless defined $len;

        print_length( $len, 'cyan' );
        print_string($string);
        return 1;

    }

    # regex 0x0B
    if ( $type == 0x0B ) {
        my ( $pattern, $flag ) = unpack( BSON_CSTRING . BSON_CSTRING, $$ref );
        substr( $$ref, 0, length($pattern) + length($flag) + 2, '' );
        print_string($pattern);
        print_string($flag);
        return 1;
    }

    # code with scope 0x0F
    if ( $type == 0x0F ) {
        my $len = get_length( $ref, 4 );
        return unless defined $len;

        # len + string + doc minimum size is 4 + 5 + 5
        if ( $len < 14 ) {
            print_length( $len, 'red' );
            return;
        }

        print_length($len);

        my $cws = substr( $$ref, 0, $len - 4, '' );

        my ( $strlen, $string ) = get_string( \$cws );

        if ( !defined $strlen ) {
            print_hex( $cws, 'red' );
            return;
        }

        print_length($strlen);
        print_string($string);

        dump_document( \$cws, 0 );

        return 1;
    }

    # dbpointer 0x0C
    if ( $type == 0x0C ) {
        my ( $len, $string ) = get_string($ref);
        return unless defined $len;

        print_length($len);
        print_string($string);

        # Check if there are 12 bytes (plus terminator) or more
        return if length($$ref) < 13;

        my $oid = substr( $$ref, 0, 12, '' );
        print_hex($oid);

        return 1;
    }

    die "Shouldn't reach here";
}

sub get_length {
    my ( $ref, $adj ) = @_;
    $adj ||= 0;
    my $len = unpack( BSON_INT32, substr( $$ref, 0, 4, '' ) );
    return unless defined $len;

    # check if requested length is too long
    if ( $len < 0 || $len > length($$ref) + $adj ) {
        print_length( $len, 'red' );
        return;
    }

    return $len;
}

sub get_string {
    my ($ref) = @_;

    my $len = get_length($ref);
    return unless defined $len;

    # len must be at least 1 for trailing 0x00
    if ( $len == 0 ) {
        print_length( $len, 'red' );
        return;
    }

    my $string = substr( $$ref, 0, $len, '' );

    # check if null terminated
    if ( substr( $string, -1, 1 ) ne NULL ) {
        print_length($len);
        print_hex( $string, 'red' );
        return;
    }

    # remove trailing null
    chop($string);

    # try to decode to UTF-8
    if ( !utf8::decode($string) ) {
        print_length($len);
        print_hex( $string . "\x00", 'red' );
        return;
    }

    return ( $len, $string );
}

sub print_error {
    my ($text) = @_;
    print colored( ["${BOLD}red"], $text );
}

sub print_type {
    my ( $type, $color ) = @_;
    $color ||= 'magenta';
    print colored( ["$BOLD$color"], sprintf( " %02x", $type ) );
}

sub print_key {
    my ($string) = @_;
    print_string( $string, 'yellow' );
}

sub print_string {
    my ( $string, $color ) = @_;
    $color ||= 'green';
    $string =~ s{([^[:graph:]])}{sprintf("\\x%02x",ord($1))}ge;
    print colored( ["$BOLD$color"], qq[ "$string"] . " 00" );
}

sub print_length {
    my ( $len, $color ) = @_;
    $color ||= 'cyan';
    print colored( ["$BOLD$color"], " " . unpack( "H*", pack( BSON_INT32, $len ) ) );
}

sub print_hex {
    my ( $value, $color ) = @_;
    $color ||= 'green';
    print colored( ["$BOLD$color"], " " . uc( unpack( "H*", $value ) ) );
}

main();

__END__

=head1 NAME

bsonview - dump a BSON string with color output showing structure

=head1 SYNOPSIS

    cat file.bson | bsondump

    echo "0500000000" | bsondump -x

=head1 OPTIONS

    -x          input is in hex format (default is 0)
    --help, -h  show help

=head1 USAGE

Reads from C<STDIN> and dumps colored structures to C<STDOUT>.

=head1 AUTHOR

=over 4

=item *

David Golden <david@mongodb.com>

=back

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2016 by MongoDB, Inc..

This is free software, licensed under:

  The Apache License, Version 2.0, January 2004

=cut

=cut
